package com.yzq.os.spider.v.controller;

import java.io.IOException;
import java.io.StringWriter;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.servlet.http.HttpServletResponse;

import org.apache.commons.beanutils.BeanUtils;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.log4j.Logger;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Node;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.servlet.ModelAndView;
import org.springframework.web.servlet.view.RedirectView;

import com.yzq.os.spider.v.domain.ListPageConfig;
import com.yzq.os.spider.v.domain.SearchEngine;
import com.yzq.os.spider.v.domain.SearchEngineParam;
import com.yzq.os.spider.v.service.domain.ListPageConfigService;
import com.yzq.os.spider.v.service.domain.SearchEngineParamService;
import com.yzq.os.spider.v.service.domain.SearchEngineService;
import com.yzq.os.spider.v.service.inter.OuterSystemService;

/**
 * 搜索引擎控制器
 * 
 * @author 苑志强(xingyu_yzq@163.com)
 * 
 */
@Controller
@RequestMapping("/engine")
public class SearchEngineController extends CommonController {

	private static Logger logger = Logger
			.getLogger(SearchEngineController.class);

	@Autowired
	private OuterSystemService etlSystemService;

	@Autowired
	private SearchEngineService searchEngineService;

	@Autowired
	private SearchEngineParamService searchEngineParamService;

	@Autowired
	private ListPageConfigService listPageConfigService;

	/**
	 * 搜索引擎添加页面
	 * 
	 * @return
	 */
	@RequestMapping("/form")
	public ModelAndView form() {
		Map<String, Object> model = new HashMap<String, Object>();
		model.put("marks", etlSystemService.getWebsites());
		model.put("methods", SearchEngineService.getMethods());
		model.put("encodes", SearchEngineService.getEncodes());
		model.put("crawlTasks", searchEngineService.getSpiderImplClasses());
		return new ModelAndView("/admin/engine/form", model);
	}

	/**
	 * 添加或修改搜索引擎
	 * 
	 * @param searchEngine
	 * @return
	 */
	@RequestMapping(method = RequestMethod.POST)
	public RedirectView save(SearchEngine searchEngine) {
		if (searchEngine.getId() == null) {
			searchEngineService.save(searchEngine);
		} else {
			searchEngineService.update(searchEngine);
		}
		return new RedirectView("engine/form");
	}

	/**
	 * 搜索引擎列表
	 * 
	 * @return
	 */
	@RequestMapping(method = RequestMethod.GET)
	public ModelAndView list() {
		Map<String, Object> model = new HashMap<String, Object>();
		model.put("searchEngines", searchEngineService.findAllList());
		return new ModelAndView("/admin/engine/list", model);
	}

	/**
	 * 修改指定ID的搜索引擎
	 * 
	 * @param eId
	 * @return
	 */
	@RequestMapping("/modify/{eId}")
	public ModelAndView modify(@PathVariable int eId) {
		Map<String, Object> model = new HashMap<String, Object>();
		model.put("marks", etlSystemService.getWebsites());
		model.put("methods", SearchEngineService.getMethods());
		model.put("encodes", SearchEngineService.getEncodes());
		model.put("crawlTasks", searchEngineService.getSpiderImplClasses());
		model.put("searchEngine", searchEngineService.findById(eId));
		return new ModelAndView("/admin/engine/form", model);
	}

	/**
	 * 导出搜索引擎配置页面
	 * 
	 * @return
	 */
	@RequestMapping(value = "/export")
	public ModelAndView exportXml() {
		Map<String, Object> model = new HashMap<String, Object>();
		model.put("engines", searchEngineService.findUIViews());
		return new ModelAndView("/admin/engine/export", model);
	}

	/**
	 * 导出选择的搜索引擎配置（导出格式XML）
	 * 
	 * @param eIds
	 * @param resp
	 * @throws DocumentException
	 * @throws Exception
	 */
	@RequestMapping(value = "/export/{eIds}")
	public void doExportXml(@PathVariable String eIds, HttpServletResponse resp)
			throws DocumentException, Exception {
		logger.info("Export config for engineIds:[" + eIds + "]");
		StringBuffer xml = new StringBuffer();
		xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
		if (StringUtils.isNotBlank(eIds)) {
			String[] engineIds = StringUtils.split(eIds, "_");
			if (ArrayUtils.isNotEmpty(engineIds)) {
				xml.append("<configs>");
				for (String _engineId : engineIds) {
					logger.info("Export config for engineId:[" + _engineId
							+ "]");
					try {
						int engineId = Integer.parseInt(_engineId);
						SearchEngine searchEngine = searchEngineService
								.findById(engineId);
						List<SearchEngineParam> params = searchEngineParamService
								.findBySearchEngineId(engineId);
						ListPageConfig listPageConfig = listPageConfigService
								.findBySearchEngineId(engineId);
						String subConfig = makeSingleConfigXml(searchEngine,
								params, listPageConfig);
						xml.append(subConfig);
					} catch (Exception e) {
						logger.error("Read search engine error engineId["
								+ _engineId + "]", e);
					}
				}
				xml.append("</configs>");
			}
		}

		String outXml = formatXML(DocumentHelper.parseText(xml.toString()));
		String fileName = "export_" + eIds + "_"
				+ DateFormatUtils.format(new Date(), "yyyy_MM_dd") + ".xml";
		responseXml(resp, fileName, outXml);
	}

	/**
	 * 导出一个搜索引擎节点（搜索引擎+搜索引擎参数+列表页配置）
	 * 
	 * @param searchEngine
	 * @param params
	 * @param listPageConfig
	 * @return
	 * @throws IllegalAccessException
	 * @throws InvocationTargetException
	 * @throws NoSuchMethodException
	 */
	@SuppressWarnings("unchecked")
	private String makeSingleConfigXml(SearchEngine searchEngine,
			List<SearchEngineParam> params, ListPageConfig listPageConfig)
			throws IllegalAccessException, InvocationTargetException,
			NoSuchMethodException {
		StringBuffer xml = new StringBuffer();
		if (searchEngine != null && listPageConfig != null) {
			xml.append("<config>");
			xml.append("<engine>");
			Map<String, String> propertiesMap = BeanUtils
					.describe(searchEngine);
			for (Object key : propertiesMap.keySet()) {
				Object value = propertiesMap.get(key);
				xml.append("<" + key + ">");
				xml.append("<![CDATA[" + (value != null ? value : "") + "]]>");
				xml.append("</" + key + ">");
			}
			xml.append("</engine>");
			xml.append("<engineParams>");
			if (CollectionUtils.isNotEmpty(params)) {
				for (SearchEngineParam param : params) {
					xml.append("<engineParam>");
					propertiesMap = BeanUtils.describe(param);
					for (Object key : propertiesMap.keySet()) {
						Object value = propertiesMap.get(key);
						xml.append("<" + key + ">");
						xml.append("<![CDATA[" + (value != null ? value : "")
								+ "]]>");
						xml.append("</" + key + ">");
					}
					xml.append("</engineParam>");
				}
			}
			xml.append("</engineParams>");
			xml.append("<pageConfig>");
			propertiesMap = BeanUtils.describe(listPageConfig);
			for (Object key : propertiesMap.keySet()) {
				Object value = propertiesMap.get(key);
				xml.append("<" + key + ">");
				xml.append("<![CDATA[" + (value != null ? value : "") + "]]>");
				xml.append("</" + key + ">");
			}
			xml.append("</pageConfig>");
			xml.append("</config>");
		} else {
			logger.error("Export config must have engine and listpageconfig for searchEngine:["
					+ searchEngine + "]");
		}
		return xml.toString();
	}

	/**
	 * 格式化XML代码
	 * 
	 * @param doc
	 * @return
	 * @throws Exception
	 */
	private String formatXML(Document doc) throws Exception {
		StringWriter out = null;
		try {
			OutputFormat formate = OutputFormat.createPrettyPrint();
			out = new StringWriter();
			XMLWriter writer = new XMLWriter(out, formate);
			writer.write(doc);
		} catch (IOException e) {
			logger.error("", e);
		} finally {
			IOUtils.closeQuietly(out);
		}
		return out.toString();
	}

	/**
	 * 导入搜索引擎配置页面
	 * 
	 * @return
	 */
	@RequestMapping(value = "/import")
	public ModelAndView importXml() {
		return new ModelAndView("/admin/engine/import");
	}

	/**
	 * 导入搜索引擎（XML2DB）
	 * 
	 * @param xml
	 * @return
	 * @throws DocumentException
	 */
	@SuppressWarnings("unchecked")
	@ResponseBody
	@RequestMapping(value = "/import", method = RequestMethod.POST)
	public String doImportXml(String xml) throws DocumentException {
		String returnValue = "OK";
		Document doc = DocumentHelper.parseText(xml);
		List<Node> configs = doc.selectNodes("//configs/config");
		if (CollectionUtils.isNotEmpty(configs)) {
			for (Node config : configs) {
				Node engine = config.selectSingleNode("engine");
				SearchEngine searchEngine = getSearchEngineFromXml(engine);
				List<Node> engineParams = config
						.selectNodes("engineParams/engineParam");
				List<SearchEngineParam> searchEngineParams = getSearchEngineParamsFromXml(engineParams);
				Node pageConfig = config.selectSingleNode("pageConfig");
				ListPageConfig listPageConfig = getListPageConfigFromXml(pageConfig);

				Integer engineId = searchEngine.getId();
				int count = searchEngineService.countById(engineId);

				if (count > 0) {
					logger.info("Import conifg eingineId:[" + engineId
							+ "] is in system.");
					logger.info("Update searchEngineId:[" + engineId + "]");
					searchEngineService.update(searchEngine);
					logger.info("Delete have params for eingineId:[" + engineId
							+ "]");
					searchEngineParamService.deleteBySearchEngineId(engineId);
					logger.info("Save new params for eingineId:[" + engineId
							+ "] size:["
							+ CollectionUtils.size(searchEngineParams) + "]");
					for (SearchEngineParam searchEngineParam : searchEngineParams) {
						searchEngineParamService.save(searchEngineParam);
					}
					logger.info("Delete have list page config for eingineId:["
							+ engineId + "]");
					listPageConfigService.deleteBySearchEngineId(engineId);
					logger.info("Save new list page config for eingineId:["
							+ engineId + "]");
					listPageConfigService.save(listPageConfig);
				} else {
					logger.info("Import conifg eingineId:[" + engineId
							+ "] is not in system.");
					logger.info("Save new engine eingineId:[" + engineId + "]");
					searchEngineService.save(searchEngine);
					logger.info("Save new params for eingineId:[" + engineId
							+ "] size:["
							+ CollectionUtils.size(searchEngineParams) + "]");
					for (SearchEngineParam ep : searchEngineParams) {
						searchEngineParamService.save(ep);
					}
					logger.info("Save new list page config for eingineId:["
							+ engineId + "]");
					listPageConfigService.save(listPageConfig);
				}
			}
		}
		return returnValue;
	}

	/**
	 * 根据Dom4j的Node对象解析搜索引擎对象
	 * 
	 * @param engine
	 * @return
	 */
	private SearchEngine getSearchEngineFromXml(Node engine) {
		SearchEngine e = new SearchEngine();
		e.setWebsiteId(getIntegerNodeValue(engine, "websiteId"));
		e.setEncode(getIntegerNodeValue(engine, "encode"));
		e.setSpiderTaskClass(getNodeValue(engine, "spiderTaskClass"));
		e.setBeforeSpiderProcClass(getNodeValue(engine, "beforeSpiderProcClass"));
		e.setIsGzip(getIntegerNodeValue(engine, "isGzip"));
		e.setCreateQueryURLClass(getNodeValue(engine, "createQueryURLClass"));
		e.setLoginClass(getNodeValue(engine, "loginClass"));
		e.setBaseUrl(getNodeValue(engine, "baseUrl"));
		e.setCompletionSpiderClass(getNodeValue(engine, "completionSpiderClass"));
		e.setId(getIntegerNodeValue(engine, "id"));
		e.setUrlEncode(getIntegerNodeValue(engine, "urlEncode"));
		e.setSleepTime(getIntegerNodeValue(engine, "sleepTime"));
		e.setName(getNodeValue(engine, "name"));
		e.setWebsiteName(getNodeValue(engine, "websiteName"));
		e.setMethod(getIntegerNodeValue(engine, "method"));
		return e;
	}

	/**
	 * 根据Dom4j的Node对象解析列表页配置对象
	 * 
	 * @param pageConfig
	 * @return
	 */
	private ListPageConfig getListPageConfigFromXml(Node pageConfig) {
		ListPageConfig lp = new ListPageConfig();
		lp.setCompanyHrefRegex(getNodeValue(pageConfig, "companyHrefRegex"));
		lp.setJobTypeName(getNodeValue(pageConfig, "jobTypeName"));
		lp.setIndustryName(getNodeValue(pageConfig, "industryName"));
		lp.setCompanyNameRegex(getNodeValue(pageConfig, "companyNameRegex"));
		lp.setJobDatePattern(getNodeValue(pageConfig, "jobDatePattern"));
		lp.setCityName(getNodeValue(pageConfig, "cityName"));
		lp.setPageSize(getIntegerNodeValue(pageConfig, "pageSize"));
		lp.setMaxRecordNum(getIntegerNodeValue(pageConfig, "maxRecordNum"));
		lp.setSearchEngineId(getIntegerNodeValue(pageConfig, "searchEngineId"));
		lp.setDataRegionRegex(getNodeValue(pageConfig, "dataRegionRegex"));
		lp.setReturnRecordNumRegex(getNodeValue(pageConfig,
				"returnRecordNumRegex"));
		lp.setNoDataPageRegex(getNodeValue(pageConfig, "noDataPageRegex"));
		lp.setCurrentPageNoName(getNodeValue(pageConfig, "currentPageNoName"));
		lp.setId(getIntegerNodeValue(pageConfig, "id"));
		lp.setWebsiteName(getNodeValue(pageConfig, "websiteName"));
		lp.setJobTitleRegex(getNodeValue(pageConfig, "jobTitleRegex"));
		lp.setJobCityRegex(getNodeValue(pageConfig, "jobCityRegex"));
		lp.setSearchEngineName(getNodeValue(pageConfig, "searchEngineName"));
		lp.setJobHrefRegex(getNodeValue(pageConfig, "jobHrefRegex"));
		lp.setJobDateRegex(getNodeValue(pageConfig, "jobDateRegex"));
		return lp;
	}

	/**
	 * 根据Dom4j的Node对象解析搜索引擎参数对象（多个）
	 * 
	 * @param engineParams
	 * @return
	 */
	private List<SearchEngineParam> getSearchEngineParamsFromXml(
			List<Node> engineParams) {
		List<SearchEngineParam> ps = new ArrayList<SearchEngineParam>();
		if (CollectionUtils.isNotEmpty(engineParams)) {
			for (Node ep : engineParams) {
				ps.add(getSearchEngineParamFromXml(ep));
			}
		}
		return ps;
	}

	/**
	 * 根据Dom4j的Node对象解析搜索引擎参数对象
	 * 
	 * @param engineParam
	 * @return
	 */
	private SearchEngineParam getSearchEngineParamFromXml(Node engineParam) {
		SearchEngineParam p = new SearchEngineParam();
		p.setId(getIntegerNodeValue(engineParam, "id"));
		p.setSingleValue(getIntegerNodeValue(engineParam, "singleValue"));
		p.setDesc(getNodeValue(engineParam, "desc"));
		p.setName(getNodeValue(engineParam, "name"));
		p.setValue(getNodeValue(engineParam, "value"));
		p.setOrderNum(getIntegerNodeValue(engineParam, "orderNum"));
		p.setSearchEngineId(getIntegerNodeValue(engineParam, "searchEngineId"));
		p.setRequired(getIntegerNodeValue(engineParam, "required"));
		return p;
	}

	/**
	 * 获取Dom4j的Node对象的属性
	 * 
	 * @param node
	 * @param name
	 * @return
	 */
	private String getNodeValue(Node node, String name) {
		return node.selectSingleNode(name).getStringValue();
	}

	/**
	 * 获取Dom4j的Node对象的属性
	 * 
	 * @param node
	 * @param name
	 * @return
	 */
	private Integer getIntegerNodeValue(Node node, String name) {
		String value = getNodeValue(node, name);
		if (StringUtils.isNotBlank(value)) {
			return NumberUtils.toInt(value);
		}
		return null;
	}

}
